package com.larry.spark.rdd.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object RDD_Oper_Test_2 {

  def main(args: Array[String]): Unit = {
    //TODO  使用spark

    val conf = new SparkConf().setMaster("local[*]").setAppName("rdd")
    val sc = new SparkContext(conf)

    val rdd = sc.textFile("data/apache.log")

    val rdd1: RDD[(String, Iterable[(String, Int)])] = rdd.map(
      line => {
        val strings = line.split(" ")
        val time = strings(3).split(":")
        (time(1),1)
      }
    ).groupBy(_._1)

    val rdd2 = rdd1.mapValues(_.size).sortBy(_._1)

    rdd2.collect().foreach(println)

    sc.stop()
  }
}
